{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_content = os.listdir()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_content = [filename for filename in dir_content if filename.find('pong') > -1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['1524378176-pong-ram-deterministic-v0.csv']"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dir_content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(dir_content[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>actions-input</th>\n",
       "      <th>rewards</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0  actions-input  rewards\n",
       "0           0              0      0.0\n",
       "1           1              0      0.0\n",
       "2           2              0      0.0\n",
       "3           3              3      0.0\n",
       "4           4              2      0.0"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>actions-input</th>\n",
       "      <th>rewards</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>216</th>\n",
       "      <td>216</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>997</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3010</th>\n",
       "      <td>3010</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3192</th>\n",
       "      <td>3192</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3410</th>\n",
       "      <td>3410</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5168</th>\n",
       "      <td>5168</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6193</th>\n",
       "      <td>6193</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7740</th>\n",
       "      <td>7740</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9617</th>\n",
       "      <td>9617</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10355</th>\n",
       "      <td>10355</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10924</th>\n",
       "      <td>10924</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11032</th>\n",
       "      <td>11032</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11286</th>\n",
       "      <td>11286</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12018</th>\n",
       "      <td>12018</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12896</th>\n",
       "      <td>12896</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15365</th>\n",
       "      <td>15365</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Unnamed: 0  actions-input  rewards\n",
       "216           216              3      1.0\n",
       "997           997              3      1.0\n",
       "3010         3010              0      1.0\n",
       "3192         3192              1      1.0\n",
       "3410         3410              0      1.0\n",
       "5168         5168              3      1.0\n",
       "6193         6193              5      1.0\n",
       "7740         7740              4      1.0\n",
       "9617         9617              5      1.0\n",
       "10355       10355              2      1.0\n",
       "10924       10924              0      1.0\n",
       "11032       11032              3      1.0\n",
       "11286       11286              0      1.0\n",
       "12018       12018              3      1.0\n",
       "12896       12896              0      1.0\n",
       "15365       15365              2      1.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.rewards > 0.0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>actions-input</th>\n",
       "      <th>rewards</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>22</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>23</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>24</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>26</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>27</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>28</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>29</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>187</th>\n",
       "      <td>187</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>188</th>\n",
       "      <td>188</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>189</th>\n",
       "      <td>189</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>190</th>\n",
       "      <td>190</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>191</th>\n",
       "      <td>191</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192</th>\n",
       "      <td>192</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>193</th>\n",
       "      <td>193</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>194</th>\n",
       "      <td>194</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>195</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>196</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>197</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>198</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>199</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>200</th>\n",
       "      <td>200</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>201</th>\n",
       "      <td>201</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202</th>\n",
       "      <td>202</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>203</th>\n",
       "      <td>203</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>204</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>205</th>\n",
       "      <td>205</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>206</th>\n",
       "      <td>206</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>207</th>\n",
       "      <td>207</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>208</th>\n",
       "      <td>208</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>209</th>\n",
       "      <td>209</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>210</th>\n",
       "      <td>210</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>211</th>\n",
       "      <td>211</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>212</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>213</th>\n",
       "      <td>213</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>214</th>\n",
       "      <td>214</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>215</th>\n",
       "      <td>215</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>216</th>\n",
       "      <td>216</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>217 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0  actions-input  rewards\n",
       "0             0              0      0.0\n",
       "1             1              0      0.0\n",
       "2             2              0      0.0\n",
       "3             3              3      0.0\n",
       "4             4              2      0.0\n",
       "5             5              5      0.0\n",
       "6             6              0      0.0\n",
       "7             7              3      0.0\n",
       "8             8              0      0.0\n",
       "9             9              1      0.0\n",
       "10           10              2      0.0\n",
       "11           11              3      0.0\n",
       "12           12              3      0.0\n",
       "13           13              0      0.0\n",
       "14           14              2      0.0\n",
       "15           15              1      0.0\n",
       "16           16              0      0.0\n",
       "17           17              0      0.0\n",
       "18           18              5      0.0\n",
       "19           19              2      0.0\n",
       "20           20              0      0.0\n",
       "21           21              4      0.0\n",
       "22           22              1      0.0\n",
       "23           23              3      0.0\n",
       "24           24              0      0.0\n",
       "25           25              0      0.0\n",
       "26           26              0      0.0\n",
       "27           27              5      0.0\n",
       "28           28              2      0.0\n",
       "29           29              5      0.0\n",
       "..          ...            ...      ...\n",
       "187         187              3      0.0\n",
       "188         188              3      0.0\n",
       "189         189              3      0.0\n",
       "190         190              0      0.0\n",
       "191         191              1      0.0\n",
       "192         192              0      0.0\n",
       "193         193              1      0.0\n",
       "194         194              5      0.0\n",
       "195         195              3      0.0\n",
       "196         196              0      0.0\n",
       "197         197              3      0.0\n",
       "198         198              2      0.0\n",
       "199         199              1      0.0\n",
       "200         200              1      0.0\n",
       "201         201              1      0.0\n",
       "202         202              5      0.0\n",
       "203         203              5      0.0\n",
       "204         204              0      0.0\n",
       "205         205              5      0.0\n",
       "206         206              5      0.0\n",
       "207         207              4      0.0\n",
       "208         208              5      0.0\n",
       "209         209              0      0.0\n",
       "210         210              4      0.0\n",
       "211         211              3      0.0\n",
       "212         212              3      0.0\n",
       "213         213              1      0.0\n",
       "214         214              5      0.0\n",
       "215         215              3      0.0\n",
       "216         216              3      1.0\n",
       "\n",
       "[217 rows x 3 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(217)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "first_sequence = df['actions-input'][:317]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[1, 0, 1, 0],\n",
       " [1, 4, 4, 1],\n",
       " [5, 5, 0, 2],\n",
       " [2, 5, 1, 0],\n",
       " [2, 4, 0, 3],\n",
       " [3, 3, 5, 1],\n",
       " [2, 4, 0, 5],\n",
       " [1, 2, 3, 4],\n",
       " [4, 4, 0, 2],\n",
       " [1, 1, 4, 2],\n",
       " [0, 1, 2, 0]]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list_sequence = first_sequence.tolist()\n",
    "treated_sequence = []\n",
    "start = 0\n",
    "incremental= 4\n",
    "limit = 4\n",
    "reversed_sequence = list_sequence[::-1]\n",
    "#reversed_sequence[start:limit]\n",
    "#reversed_sequence[limit+4:]\n",
    "reversed_sequence_len = lambda: range(len(reversed_sequence))\n",
    "for _ in reversed_sequence_len():\n",
    "    done = not reversed_sequence[start:limit]\n",
    "    if not done: treated_sequence.append(reversed_sequence[start:limit])\n",
    "    reversed_sequence = reversed_sequence[limit+incremental:]\n",
    "    start = start + incremental\n",
    "    limit = limit + incremental\n",
    "    if done: break\n",
    "treated_sequence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Produced length of output: 11\n"
     ]
    }
   ],
   "source": [
    "print('Produced length of output: {}'.format(len(treated_sequence)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "actions = np.array(treated_sequence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1, 0, 1, 0],\n",
       "       [1, 4, 4, 1],\n",
       "       [5, 5, 0, 2],\n",
       "       [2, 5, 1, 0],\n",
       "       [2, 4, 0, 3],\n",
       "       [3, 3, 5, 1],\n",
       "       [2, 4, 0, 5],\n",
       "       [1, 2, 3, 4],\n",
       "       [4, 4, 0, 2],\n",
       "       [1, 1, 4, 2],\n",
       "       [0, 1, 2, 0]])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "actions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "second_sequence = df['rewards'][:317]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[-1.0, 0.0, 0.0, 0.0],\n",
       " [0.0, 0.0, 0.0, 0.0],\n",
       " [0.0, 0.0, 0.0, 0.0],\n",
       " [0.0, 0.0, 0.0, 0.0],\n",
       " [0.0, 0.0, 0.0, 0.0],\n",
       " [1.0, 0.0, 0.0, 0.0],\n",
       " [0.0, 0.0, 0.0, 0.0],\n",
       " [0.0, 0.0, 0.0, 0.0],\n",
       " [0.0, 0.0, 0.0, 0.0],\n",
       " [0.0, -1.0, 0.0, 0.0],\n",
       " [0.0, 0.0, 0.0, 0.0]]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list_sequence = second_sequence.tolist()\n",
    "treated_sequence = []\n",
    "start = 0\n",
    "incremental = 4\n",
    "limit = 4\n",
    "reversed_sequence = list_sequence[::-1]\n",
    "reversed_sequence_len = lambda: range(len(reversed_sequence))\n",
    "for _ in reversed_sequence_len():\n",
    "    done = not reversed_sequence[start:limit]\n",
    "    if not done: treated_sequence.append(reversed_sequence[start:limit])\n",
    "    reversed_sequence = reversed_sequence[limit+incremental:]\n",
    "    start = start + incremental\n",
    "    limit = limit + incremental\n",
    "    if done: break\n",
    "treated_sequence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Produced length of output: 11\n"
     ]
    }
   ],
   "source": [
    "print('Produced length of output: {}'.format(len(treated_sequence)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "rewards = np.array(treated_sequence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.,  0.,  0.,  0.],\n",
       "       [ 0.,  0.,  0.,  0.],\n",
       "       [ 0.,  0.,  0.,  0.],\n",
       "       [ 0.,  0.,  0.,  0.],\n",
       "       [ 0.,  0.,  0.,  0.],\n",
       "       [ 1.,  0.,  0.,  0.],\n",
       "       [ 0.,  0.,  0.,  0.],\n",
       "       [ 0.,  0.,  0.,  0.],\n",
       "       [ 0.,  0.,  0.,  0.],\n",
       "       [ 0., -1.,  0.,  0.],\n",
       "       [ 0.,  0.,  0.,  0.]])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rewards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings; warnings.simplefilter('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "from keras.models import Sequential"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.layers import Dense"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.layers import Activation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import model_selection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "x, x_val, y, y_val = model_selection.train_test_split(rewards, actions, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = Sequential()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.add(Dense(4, input_shape=(4,)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.add(Activation('relu'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.add(Dense(4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.add(Activation('sigmoid'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.add(Dense(4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(optimizer='rmsprop', loss='mse')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 8 samples, validate on 3 samples\n",
      "Epoch 1/10\n",
      "8/8 [==============================] - 0s 23ms/step - loss: 6.8860 - val_loss: 3.8338\n",
      "Epoch 2/10\n",
      "8/8 [==============================] - 0s 349us/step - loss: 6.8483 - val_loss: 3.8134\n",
      "Epoch 3/10\n",
      "8/8 [==============================] - 0s 379us/step - loss: 6.8212 - val_loss: 3.7964\n",
      "Epoch 4/10\n",
      "8/8 [==============================] - 0s 284us/step - loss: 6.7985 - val_loss: 3.7815\n",
      "Epoch 5/10\n",
      "8/8 [==============================] - 0s 295us/step - loss: 6.7785 - val_loss: 3.7679\n",
      "Epoch 6/10\n",
      "8/8 [==============================] - 0s 418us/step - loss: 6.7602 - val_loss: 3.7552\n",
      "Epoch 7/10\n",
      "8/8 [==============================] - 0s 343us/step - loss: 6.7432 - val_loss: 3.7432\n",
      "Epoch 8/10\n",
      "8/8 [==============================] - 0s 430us/step - loss: 6.7271 - val_loss: 3.7318\n",
      "Epoch 9/10\n",
      "8/8 [==============================] - 0s 387us/step - loss: 6.7117 - val_loss: 3.7209\n",
      "Epoch 10/10\n",
      "8/8 [==============================] - 0s 597us/step - loss: 6.6969 - val_loss: 3.7103\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f5ab281db70>"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(x, y, epochs=10, validation_data=(x_val, y_val))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8/8 [==============================] - 0s 133us/step\n"
     ]
    }
   ],
   "source": [
    "score = model.evaluate(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6.6825456619262695"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.37288553,  0.08706434,  0.39861295,  0.03514653],\n",
       "       [ 0.8745328 ,  0.5887309 ,  0.8690586 , -0.295183  ]],\n",
       "      dtype=float32)"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.predict(np.array([[2,5,1,0],[1,1,4,2]]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!python ../app.py --environment_name \"Pong-v0\" --output_stats_filename \"pong-v0\""
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
